C/C++ Users Group Library 1996 July

home *** CD-ROM | disk | FTP | other *** search

/ C/C++ Users Group Library 1996 July / C-C++ Users Group Library July 1996.iso / vol_300 / 333_02 / awktab.y < prev next >

Wrap

Text File | 1989-02-18 | 30KB | 1,293 lines

/***************************************************************************/ /* gawk -- GNU version of awk */ /* YACC input file to create the gAWK semantic parser */ /* */ /* Copyright (C) 1986 Free Software Foundation */ /* Written by Paul Rubin, August 1986 */ /* */ /***************************************************************************/ /* */ /* GAWK is distributed in the hope that it will be useful, but WITHOUT ANY */ /* WARRANTY. No author or distributor accepts responsibility to anyone */ /* for the consequences of using it or for whether it serves any */ /* particular purpose or works at all, unless he says so in writing. */ /* Refer to the GAWK General Public License for full details. */ /* */ /* Everyone is granted permission to copy, modify and redistribute GAWK, */ /* but only under the conditions described in the GAWK General Public */ /* License. A copy of this license is supposed to have been given to you */ /* along with GAWK so you can know your rights and responsibilities. It */ /* should be in a file named COPYING. Among other things, the copyright */ /* notice and this notice must be preserved on all copies. */ /* */ /* In other words, go ahead and share GAWK, but don't try to stop */ /* anyone else from sharing it farther. Help stamp out software hoarding! */ /* */ /***************************************************************************/ %{ #define YYDEBUG 12 #include <stdio.h> #include <stdlib.h> #include <stdarg.h> #include <string.h> #include "awk.h" STATIC int NEAR PASCAL yylex(void); STATIC int NEAR PASCAL parse_escape(char **string_ptr); /* The following variable is used for a very sickening thing. The awk */ /* language uses white space as the string concatenation operator, but */ /* having a white space token that would have to appear everywhere in all */ /* the grammar rules would be unbearable. It turns out we can return */ /* CONCAT_OP exactly when there really is one, just from knowing what */ /* kinds of other tokens it can appear between (namely, constants, */ /* variables, or close parentheses). This is because concatenation has */ /* the lowest priority of all operators. want_concat_token is used to */ /* remember that something that could be the left side of a concat has */ /* just been returned. If anyone knows a cleaner way to do this (don't */ /* look at the Un*x code to find one, though), please suggest it. */ static int want_concat_token; /* Two more horrible kludges. The same comment applies to these two too */ static int want_regexp = 0; /* lexical scanning kludge */ int lineno = 1; /* JF for error msgs */ /* During parsing of a gAWK program, the pointer to the next character */ /* is in this variable. */ char *lexptr; char *lexptr_begin; %} %union { long lval; AWKNUM fval; NODE *nodeval; int nodetypeval; char *sval; NODE *(PASCAL *ptrval)(NODE *); } %type <nodeval> exp start program rule pattern conditional regexp %type <nodeval> action variable redirect_in redirect_out exp_list builtin %type <nodeval> statements statement if_statement opt_exp %type <nodetypeval> whitespace relop %token <sval> NAME REGEXP YSTRING %token <lval> ERROR INCDEC %token <fval> NUMBER %token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP %token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE %token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE LEX_DELETE %token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT %token <nodetypeval> RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ REDIR_APPEND %token LEX_IN %token <lval> LEX_AND LEX_OR INCREMENT DECREMENT %token <ptrval> LEX_BUILTIN LEX_MATCH_FUNC LEX_SUB_FUNC LEX_SPLIT_FUNC %token <ptrval> LEX_GETLINE /* Lowest to highest */ %right ASSIGNOP %left ',' %right '?' ':' %left LEX_OR %left LEX_AND %left CONCAT_OP %nonassoc MATCHOP '>' '<' RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ %left '+' '-' %left '*' '/' '%' %right UNARY %right '^' %% start : optional_newlines program { expression_value = $2; } ; program : rule { $$ = node($1, NODE_RULE_LIST, NULL); } | program rule { /* cons the rule onto the tail of list */ $$ = append_right($1, node($2, NODE_RULE_LIST, NULL)); } ; rule : pattern action NEWLINE optional_newlines { $$ = node($1, NODE_RULE_NODE, $2); } ; pattern : /* empty */ { $$ = NULL; } | LEX_BEGIN { $$ = node(NULL, NODE_K_BEGIN, NULL); } | LEX_END { $$ = node(NULL, NODE_K_END, NULL); } | conditional { $$ = $1; } | conditional ',' conditional { $$ = mkrangenode(node($1, NODE_COND_PAIR, $3)); } ; conditional : '!' conditional %prec UNARY { $$ = node($2, NODE_NOT, NULL); } | '(' exp_list ')' CONCAT_OP LEX_IN NAME { $$ = node(variable($6), NODE_MEMBER_COND, $2); } | exp CONCAT_OP LEX_IN NAME { $$ = node(variable($4), NODE_MEMBER_COND, $1); } | conditional LEX_AND conditional { $$ = node($1, NODE_AND, $3); } | conditional LEX_OR conditional { $$ = node ($1, NODE_OR, $3); } | '(' conditional ')' { $$ = $2; want_concat_token = 0; } | regexp { $$ = $1; } | exp MATCHOP regexp { $$ = node($1, $2, $3); } | exp MATCHOP variable { $$ = node($1, $2, $3); } | exp relop exp { $$ = node($1, $2, $3); } ; action : /* empty */ { $$ = NULL; } | '{' whitespace statements '}' { $$ = $3; } ; statements : statement { $$ = node($1, NODE_STATEMENT_LIST, NULL); } | statements statement { $$ = append_right($1, node($2, NODE_STATEMENT_LIST, NULL)); } ; statement_term : NEWLINE optional_newlines { $<nodetypeval>$ = NODE_ILLEGAL; } | ';' optional_newlines { $<nodetypeval>$ = NODE_ILLEGAL; } ; regexp : '/' { ++want_regexp; } REGEXP '/' { want_regexp = 0; $$ = node(NULL, NODE_REGEXP, (NODE *) make_regexp($3)); } relop : '>' { $$ = NODE_GREATER; } | '<' { $$ = NODE_LESS; } | RELOP_EQ { $$ = NODE_EQUAL; } | RELOP_GEQ { $$ = NODE_GEQ; } | RELOP_LEQ { $$ = NODE_LEQ; } | RELOP_NEQ { $$ = NODE_NOTEQUAL; } ; whitespace : /* blank */ { $$ = NODE_ILLEGAL; } | CONCAT_OP | NEWLINE | whitespace CONCAT_OP | whitespace NEWLINE ; statement : '{' whitespace statements '}' whitespace { $$ = $3; } | if_statement { $$ = $1; } | LEX_WHILE '(' conditional ')' whitespace statement { $$ = node($3, NODE_K_WHILE, $6); } | LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement { $$ = node($10, NODE_K_FOR, (NODE *) make_for_loop($3, $5, $7)); } | LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement { $$ = node($9, NODE_K_FOR, (NODE *) make_for_loop($3, NULL, $6)); } | LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement { $$ = node($9, NODE_K_ARRAYFOR, (NODE *) make_for_loop(variable($3), NULL, variable($6))); } | LEX_BREAK statement_term /* for break, maybe we'll have to remember where to break to */ { $$ = node(NULL, NODE_K_BREAK, NULL); } | LEX_CONTINUE statement_term /* similarly */ { $$ = node(NULL, NODE_K_CONTINUE, NULL); } | LEX_PRINT exp_list redirect_out statement_term { $$ = node($2, NODE_K_PRINT, $3); } | LEX_PRINT '(' exp_list ')' /* BW: print(...) */ {